*Project: Ghana school feeding and learning 
* EA, 21 December 2017
* This dofile generates age-standardised zscores for the educational outcomes, as commonly done in other econ publications
 
**** Round 1 *****
***** GENERATE STANDARDISE SCORES ******
	**** FIRST REMOVE TESTER EFFECTS (BY ROUND): First, for each scale, tester/interviewer effects were netted out by running a regression of the raw scores on tester/interviewer dummies using Ordinary Least Squares (OLS). 
	
	xi: reg maths i.enum2 if wave ==0
	predict math_temp_r1, residuals
	xi: reg maths i.iwerid if wave ==1
	predict math_temp_r2, residuals
	

	*Step 2: Lowess in the control population to generate age conditional mean 
	* lowess `temp' `agerefvar' `if', generate(`temp_la') bwidth(`bwidthlow')
	
	lowess math_temp_r1 agemo if wave==0 & targetage1==1 & arm2==0, generate(math_temp_r1_la) bwidth(1.6)
	
	**(note: i don't interpolate for missing values because i have all corresponding ages between treatment and control)
	
	* Step 3: Interpolate for missing values (ages that are in treatment but not control)
	* ipolate `temp_la' `agerefvar' `sampleif' , generate(`temp_mean_age')

	ipolate math_temp_r1_la agemo if wave==0 & targetage1==1, generate(temp_math_r1_meanage)
	
	* Step 4: Create the variance
		* 	gen `as_v' = `temp' - `temp_mean_age' `ifsample'
	* first generate difference raw scare (net of tester effect) and age-standardised mean
	gen math_r1_v = math_temp_r1 - temp_math_r1_meanage if targetage1==1 & wave ==0 
	
		* gen `resid2_as_v' = `as_v'^2   
	gen math_resid2_r1_v = math_r1_v^2
	
		* repeat lowness for variance of the control group (reference pop)
		* lowess  `resid2_as_v' `agerefvar' `if', generate(`temp2') bwidth(`bwidthlow')
	lowess  math_resid2_r1_v agemo if wave==0 & targetage1==1 & arm2==0, generate(temp2_r1) bwidth(1.6)
	
		*interpolating for missing values 
		*ipolate `temp2' `agerefvar' `sampleif' , generate(`var_')

	ipolate temp2_r1 agemo if wave==0 & targetage1==1 , generate(math_r1_var)
	
	*Step 5: compute z-score
	
	*gen `generate' = (`as_v') / ((`var_')^0.5)
	*sum `generate' `if'

	*replace `generate' = (`generate'-r(mean)) / r(sd)

	*lab var `generate' "Z score for `1' relative to reference population `if'"

	gen zmath_r1 =(math_r1_v) / ((math_r1_var)^0.5)
	
	replace zmath_r1 = (zmath_r1-r(mean)) / r(sd)
	
	* remove outliers >< 3sd
	
	*/
	****** i try to do a programme for the 4 test scores: i do it by round so for the time being i won't care about round 2
		
	* 1. remove tester effects from raw scores	
	foreach v of varlist maths lit raven digit {
	xi: reg `v' i.enum2 if wave ==0
	predict `v'_temp_r1, residuals
	}
	
	drop _Ienum2_120 - _Ienum2_1160
	
	*Step 2: Lowess in the control population to generate age conditional mean 
	* lowess `temp' `agerefvar' `if', generate(`temp_la') bwidth(`bwidthlow')
	**(note: i don't interpolate for missing values because i have all corresponding ages between treatment and control)

	foreach v of varlist maths lit raven digit {
	lowess `v'_temp_r1 agemo if wave==0 & targetage1==1 & arm2==0, generate(`v'_temp_r1_la) bwidth(1.6)
	}
	
	* Step 3: Interpolate for missing values (ages that are in treatment but not control)
	* ipolate `temp_la' `agerefvar' `sampleif' , generate(`temp_mean_age')
	
	foreach v of varlist maths lit raven digit {
	ipolate `v'_temp_r1_la agemo if wave==0 & targetage1==1, generate(temp_`v'_r1_meanage)
	}
	
	* Step 4: Create the variance
		* 	gen `as_v' = `temp' - `temp_mean_age' `ifsample'
		
		* first generate difference raw scare (net of tester effect) and age-standardised mean
	
	foreach v of varlist maths lit raven digit {
	gen `v'_r1_v = `v'_temp_r1 - temp_`v'_r1_meanage if targetage1==1 & wave ==0  
	gen `v'_resid2_r1_v = `v'_r1_v^2
	}
			
		* repeat lowness for variance of the control group (reference pop)
		* lowess  `resid2_as_v' `agerefvar' `if', generate(`temp2') bwidth(`bwidthlow')
		*interpolating for missing values 
		*ipolate `temp2' `agerefvar' `sampleif' , generate(`var_')
	
	foreach v of varlist maths lit raven digit {
	lowess  `v'_resid2_r1_v agemo if wave==0 & targetage1==1 & arm2==0, generate(temp2`v'_r1) bwidth(1.6)
	ipolate temp2`v'_r1 agemo if wave==0 & targetage1==1 , generate(`v'_r1_var)
	}
	
	*Step 5: compute z-score
	gen zmaths_r1 =(maths_r1_v) / ((maths_r1_var)^0.5)
	sum zmaths_r1 if wave==0 & arm2==0
	replace zmaths_r1 = (zmaths_r1-r(mean)) / r(sd)
	
	gen zlit_r1 =(lit_r1_v) / ((lit_r1_var)^0.5)
	su zlit_r1 if wave==0 & arm2==0
	replace zlit_r1 = (zlit_r1-r(mean)) / r(sd)
	
	gen zdigit_r1 =(digit_r1_v) / ((digit_r1_var)^0.5)
	su zdigit_r1 if wave==0 & arm2==0
	replace zdigit_r1 = (zdigit_r1-r(mean)) / r(sd)
	
	gen zraven_r1 =(raven_r1_v) / ((raven_r1_var)^0.5)
	su zraven_r1 if wave==0 & arm2==0
	replace zraven_r1 = (zraven_r1-r(mean)) / r(sd)
	
	foreach v of varlist maths lit raven digit {
	lab var z`v'_r1 "Z score for `v' relative to control in round 1"
	replace z`v'_r1=. if z`v'_r1<-3 | z`v'_r1 >3
	}
	
	drop maths_temp_r1 - digit_r1_var
	jj
	****** ENDLINE ZSCORES ****

	* 1. remove tester effects from raw scores	
	foreach v of varlist maths lit raven digit {
	xi: reg `v' i.iwerid if wave ==1
	predict `v'_temp_r2, residuals
	}
	
	drop  _Iiwerid_2 - _Iiwerid_83
	
	*Step 2: Lowess in the control population to generate age conditional mean 
	* lowess `temp' `agerefvar' `if', generate(`temp_la') bwidth(`bwidthlow')
	**(note: i don't interpolate for missing values because i have all corresponding ages between treatment and control)

	foreach v of varlist maths lit raven digit {
	lowess `v'_temp_r2 agemo if wave==1 & dummy_age2==1 & arm2==0, generate(`v'_temp_r2_la) bwidth(1.6)
	}
	
	* Step 3: Interpolate for missing values (ages that are in treatment but not control)
	* ipolate `temp_la' `agerefvar' `sampleif' , generate(`temp_mean_age')
	
	foreach v of varlist maths lit raven digit {
	ipolate `v'_temp_r2_la agemo if wave==1 & dummy_age2==1, generate(temp_`v'_r2_meanage)
	}
	
	* Step 4: Create the variance
		* 	gen `as_v' = `temp' - `temp_mean_age' `ifsample'
		
		* first generate difference raw scare (net of tester effect) and age-standardised mean
	
	foreach v of varlist maths lit raven digit {
	gen `v'_r2_v = `v'_temp_r2 - temp_`v'_r2_meanage if dummy_age2==1 & wave ==1
	gen `v'_resid2_r2_v = `v'_r2_v^2
	}
	
	foreach v of varlist maths lit raven digit {
	lowess  `v'_resid2_r2_v agemo if wave==1 & dummy_age2==1 & arm2==0, generate(temp2`v'_r2) bwidth(1.6)
	ipolate temp2`v'_r2 agemo if wave==1 & dummy_age2==1 , generate(`v'_r2_var)
	}
	
	*Step 5: compute z-score
	gen zmaths_r2 =(maths_r2_v) / ((maths_r2_var)^0.5)
	sum zmaths_r2 if wave==1 & arm2==0
	replace zmaths_r2 = (zmaths_r2-r(mean)) / r(sd)
	
	gen zlit_r2 =(lit_r2_v) / ((lit_r2_var)^0.5)
	su zlit_r2 if wave==1 & arm2==0
	replace zlit_r2 = (zlit_r2-r(mean)) / r(sd)
	
	gen zdigit_r2 =(digit_r2_v) / ((digit_r2_var)^0.5)
	su zdigit_r2 if wave==1 & arm2==0
	replace zdigit_r2 = (zdigit_r2-r(mean)) / r(sd)
	
	gen zraven_r2 =(raven_r2_v) / ((raven_r2_var)^0.5)
	su zraven_r2 if wave==1 & arm2==0
	replace zraven_r2 = (zraven_r2-r(mean)) / r(sd)
	
	foreach v of varlist maths lit raven digit {
	lab var z`v'_r2 "Z score for `v' relative to control in round 1"
	replace z`v'_r2=. if z`v'_r2<-3 | z`v'_r2 >3
	}
	
	drop maths_temp_r2 - digit_r2_var
	
	foreach v of varlist maths lit raven digit {
	gen  z`v' =  z`v'_r1 if wave==0
		replace z`v' =  z`v'_r2 if wave==1
		label var z`v' "Age-standardised `v' score"
		}
	
	keep u_id wave zmaths zlit zraven zdigit arm2
	
	save "$output/cogs_standard", replace
	
	
	
	
	
	
	
	
